## Import data 
covid = 
  read_csv("./data/covid_data_monthly.csv") %>% 
  mutate(month = month.name[as.numeric(month)],
         month = as.factor(month),
         month = ordered(month, levels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")),
         year = as.character(year),
         Confirmed_of_Month = as.numeric(Confirmed_of_Month))

## take a look at covid plot 
covid %>% 
  ggplot(aes(x = month, y = Confirmed_of_Month, color = year)) + 
  geom_line(aes(group = year)) +
  geom_point(size = 1.5)

export_volume_df = 
  read_csv("./data/cleaned data/export_volume_combined.csv") %>% 
  mutate(month = as.factor(month),
         month = recode(month, "Feburary" = "February"),
         month = ordered(month, levels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")),
         year = as.character(year))

export_volume_df %>% 
  group_by(month,year) %>% 
  summarize(sum_of_export = sum(export_volume)) %>% 
  ggplot(aes(x = month, y = sum_of_export, color = year)) + 
  geom_line(aes(group = year)) +
  geom_point(size = 1.5) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

import_volume_df = 
  read_csv("./data/cleaned data/import_volume_combined.csv") %>% 
  mutate(month = as.factor(month),
         month = recode(month, "Feburary" = "February"),
         month = ordered(month, levels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")),
         year = as.character(year))

import_volume_df %>% 
  group_by(month,year) %>% 
  summarize(sum_of_import = sum(import_volume)) %>% 
  ggplot(aes(x = month, y = sum_of_import, color = year)) + 
  geom_line(aes(group = year)) +
  geom_point(size = 1.5) + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

combined_df = list(covid, export_volume_df, import_volume_df) %>% 
  reduce(left_join, by = c("year", "month"), all = TRUE) %>%
  janitor::clean_names() %>% 
  rename("export_product_type" = "product_type_x",
         "import_product_type" = "product_type_y") 

export vs. covid case

export_fit = lm(export_volume ~ confirmed_of_month, data = combined_df)
summary(export_fit)

Call:
lm(formula = export_volume ~ confirmed_of_month, data = combined_df)

Residuals:
    Min      1Q  Median      3Q     Max 
-1885.0 -1684.6 -1548.5  -417.6 17170.7 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)         1.930e+03  8.917e+01  21.641   <2e-16 ***
confirmed_of_month -1.344e-07  1.230e-07  -1.093    0.274    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 3489 on 4444 degrees of freedom
  (2 observations deleted due to missingness)
Multiple R-squared:  0.0002687, Adjusted R-squared:  4.376e-05 
F-statistic: 1.195 on 1 and 4444 DF,  p-value: 0.2745
export_df = 
  combined_df %>% 
  na.omit() %>% 
  nest(data = confirmed_of_month:export_volume) %>% 
  mutate(
    lm_fits = map(.x = data, ~ lm(export_volume ~ confirmed_of_month + deaths_of_month, data = .x)),
    lm_results = map(lm_fits, broom::tidy)) %>% 
  select(month, year, lm_results) %>% 
  unnest(lm_results) 
  
export_df %>% 
  na.omit() %>% 
  ggplot(aes(x = estimate)) +
  geom_density()

export_plot = combined_df %>% 
  group_by(month, year, confirmed_of_month) %>% 
  summarize(export_sum = sum(export_volume)) %>% 
ggplot(aes(x = confirmed_of_month, y = export_sum)) +
  geom_point(alpha = 0.5) +
  scale_y_continuous() +
  geom_smooth(se = F, color = "red", method = "lm", size = 1, linetype = 2) +
  labs(title = "Covid Cases vs. Export Volume", 
       x = "Covid Cases", 
       y = "Export Volume")

combined_df %>% 
  mutate(
    text_label = str_c("Confirmed Cases: $", confirmed_of_month, "\nExport Volume: ", export_volume)) %>% 
  plot_ly(
    x = ~ confirmed_of_month, y = ~ export_volume, type = "scatter", mode = "markers", color = ~month, text = ~text_label, alpha = 0.5)

import vs. covid case

import_fit = lm(import_volume ~ confirmed_of_month, data = combined_df)
summary(import_fit)

Call:
lm(formula = import_volume ~ confirmed_of_month, data = combined_df)

Residuals:
   Min     1Q Median     3Q    Max 
-315.7 -218.6 -167.6  108.7 1011.7 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)        3.265e+02  8.343e+00   39.14   <2e-16 ***
confirmed_of_month 5.060e-09  1.150e-08    0.44     0.66    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 326.4 on 4444 degrees of freedom
  (2 observations deleted due to missingness)
Multiple R-squared:  4.352e-05, Adjusted R-squared:  -0.0001815 
F-statistic: 0.1934 on 1 and 4444 DF,  p-value: 0.6601
import_df = 
  combined_df %>% 
  na.omit() %>% 
  nest(data = confirmed_of_month:import_volume) %>% 
  mutate(
    lm_fits = map(.x = data, ~ lm(import_volume ~ confirmed_of_month, data = .x), na.action = na.omit()),
    lm_results = map(lm_fits, broom::tidy)) %>% 
  select(month, year, lm_results) %>% 
  unnest(lm_results) 
  
import_df %>% 
  ggplot(aes(x = estimate)) +
  geom_density()

import_plot = combined_df %>% 
  group_by(month, year, confirmed_of_month) %>% 
  summarize(import_sum = sum(import_volume)) %>% 
  ggplot(aes(x = confirmed_of_month, y = import_sum)) +
  geom_point(alpha = 0.5) +
  scale_y_continuous() +
  geom_smooth(se = F, color = "red", method = "lm", size = 1, linetype = 2) +
  labs(title = "Covid Cases vs. Import Volume", 
       x = "Covid Cases", 
       y = "Import Volume")
  

combined_df %>% 
  group_by(month, year, confirmed_of_month) %>% 
  summarize(import_sum = sum(import_volume)) %>% 
  mutate(
    text_label = str_c("Confirmed Cases: $", confirmed_of_month, "\nImport Volume: ", import_sum)) %>% 
  plot_ly(
    x = ~ confirmed_of_month, y = ~ import_sum, type = "scatter", mode = "markers", color = ~month, text = ~text_label, alpha = 0.5)
ggpubr::ggarrange(import_plot, export_plot)